1 Background

We compare the inference results from TMB, aghq and tmbstan. Import these inference results as follows:

tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
tmbstan <- readRDS("depends/tmbstan.rds")

For more information about the conditions under which these results were generated, see:

depends <- yaml::read_yaml("orderly.yml")$depends

for(i in seq_along(depends)) {
  report_name <- names(depends[[i]])
  print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
  report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
  print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
  print(orderly::orderly_info(report_id, report_name)$parameters)
}
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230114-173843-50cf86d7 and was run with the following parameters:"
## $tmb
## [1] TRUE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nthin
## [1] 1
## 
## $nsample
## [1] 1000
## 
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE)"
## [1] "Obtained report had ID 20230112-210550-b5292d7e and was run with the following parameters:"
## $aghq
## [1] TRUE
## 
## $tmb
## [1] FALSE
## 
## $k
## [1] 2
## 
## $tmbstan
## [1] FALSE
## 
## $niter
## [1] 1000
## 
## $nsample
## [1] 1000
## 
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
## 
## $niter
## [1] 20000
## 
## $nthin
## [1] 20
## 
## $tmb
## [1] FALSE
## 
## $aghq
## [1] FALSE
## 
## $k
## [1] 2
## 
## $nsample
## [1] 1000

All of the possible parameter names are as follows:

unique(names(tmb$fit$obj$env$par))
##  [1] "beta_rho"             "beta_alpha"           "beta_lambda"          "beta_anc_rho"         "beta_anc_alpha"       "logit_phi_rho_x"     
##  [7] "log_sigma_rho_x"      "logit_phi_rho_xs"     "log_sigma_rho_xs"     "logit_phi_rho_a"      "log_sigma_rho_a"      "logit_phi_rho_as"    
## [13] "log_sigma_rho_as"     "log_sigma_rho_xa"     "u_rho_x"              "us_rho_x"             "u_rho_xs"             "us_rho_xs"           
## [19] "u_rho_a"              "u_rho_as"             "logit_phi_alpha_x"    "log_sigma_alpha_x"    "logit_phi_alpha_xs"   "log_sigma_alpha_xs"  
## [25] "logit_phi_alpha_a"    "log_sigma_alpha_a"    "logit_phi_alpha_as"   "log_sigma_alpha_as"   "log_sigma_alpha_xa"   "u_alpha_x"           
## [31] "us_alpha_x"           "u_alpha_xs"           "us_alpha_xs"          "u_alpha_a"            "u_alpha_as"           "u_alpha_xa"          
## [37] "OmegaT_raw"           "log_betaT"            "log_sigma_lambda_x"   "ui_lambda_x"          "log_sigma_ancrho_x"   "log_sigma_ancalpha_x"
## [43] "ui_anc_rho_x"         "ui_anc_alpha_x"       "log_sigma_or_gamma"   "log_or_gamma"

2 Time taken

data.frame(
  "TMB" = tmb$time,
  "aghq" = aghq$time,
  "tmbstan" = tmbstan$time
)

3 Histogram

histogram_plot("beta_anc_rho")

4 KS

4.1 Individual parameters

ks_helper <- function(par) to_ks_df(par) %>% ks_plot(par = par)

ks_helper("beta")

ks_helper("logit")

ks_helper("log_sigma")

ks_helper("u_rho_x")

ks_helper("u_rho_xs")

ks_helper("us_rho_x")

ks_helper("us_rho_xs")

ks_helper("u_rho_a")

ks_helper("u_rho_as")

ks_helper("u_alpha_x")

ks_helper("u_alpha_xs")

ks_helper("us_alpha_x")

ks_helper("us_alpha_xs")

ks_helper("u_alpha_a")

ks_helper("u_alpha_as")

ks_helper("u_alpha_xa")

ks_helper("ui_anc_rho_x")

ks_helper("ui_anc_alpha_x")

ks_helper("log_or_gamma")

4.2 Summary table

ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
  to_ks_df(x) %>%
    group_by(method) %>%
    summarise(ks = mean(ks), par = x)
}) %>%
  bind_rows() %>%
  pivot_wider(names_from = "method", values_from = "ks") %>%
  rename(
    "Parameter" = "par",
    "KS(aghq, tmbstan)" = "aghq",
    "KS(TMB, tmbstan)" = "TMB",
  )

ks_summary %>%
  gt::gt() %>%
  gt::fmt_number(
    columns = starts_with("KS"),
    decimals = 3
  )
Parameter KS(aghq, tmbstan) KS(TMB, tmbstan)
beta_rho 0.094 0.096
beta_alpha 0.110 0.104
beta_lambda 0.053 0.074
beta_anc_rho 0.076 0.072
beta_anc_alpha 0.036 0.031
logit_phi_rho_x 0.425 0.536
log_sigma_rho_x 0.589 0.646
logit_phi_rho_xs 0.387 0.510
log_sigma_rho_xs 0.822 0.613
logit_phi_rho_a 0.828 0.552
log_sigma_rho_a 0.448 0.585
logit_phi_rho_as 0.823 0.569
log_sigma_rho_as 0.298 0.589
log_sigma_rho_xa 0.441 0.669
u_rho_x 0.096 0.094
us_rho_x 0.066 0.063
u_rho_xs 0.125 0.122
us_rho_xs 0.040 0.040
u_rho_a 0.092 0.095
u_rho_as 0.093 0.095
logit_phi_alpha_x 0.278 0.532
log_sigma_alpha_x 0.706 0.557
logit_phi_alpha_xs 0.291 0.552
log_sigma_alpha_xs 0.675 0.540
logit_phi_alpha_a 0.784 0.524
log_sigma_alpha_a 0.297 0.541
logit_phi_alpha_as 0.734 0.515
log_sigma_alpha_as 0.232 0.514
log_sigma_alpha_xa 0.708 0.627
u_alpha_x 0.100 0.096
us_alpha_x 0.089 0.091
u_alpha_xs 0.107 0.102
us_alpha_xs 0.105 0.106
u_alpha_a 0.091 0.083
u_alpha_as 0.119 0.109
u_alpha_xa 0.072 0.069
OmegaT_raw 0.196 0.518
log_betaT 0.166 0.689
log_sigma_lambda_x 0.547 0.737
ui_lambda_x 0.159 0.162
log_sigma_ancrho_x 0.714 0.504
log_sigma_ancalpha_x 0.705 0.519
ui_anc_rho_x 0.056 0.056
ui_anc_alpha_x 0.066 0.065
log_sigma_or_gamma 0.500 0.524
log_or_gamma 0.061 0.061
ggplot(ks_summary, aes(x = `KS(TMB, tmbstan)`, y = `KS(aghq, tmbstan)`)) +
  geom_point(alpha = 0.5) +
  xlim(0, 1) +
  ylim(0, 1) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  labs(x = "KS(aghq, tmbstan)", y = "KS(TMB, tmbstan)") +
  theme_minimal() 

ks_summary %>%
  mutate(`KS difference` = `KS(TMB, tmbstan)` - `KS(aghq, tmbstan)`) %>%
  ggplot(aes(x = `KS difference`)) +
    geom_boxplot(width = 0.5) +
    coord_flip() +
    labs(x = "KS(TMB, tmbstan) - KS(aghq, tmbstan)") +
    theme_minimal()

4.3 Investigation into large KS values

The following parameters have KS values greater than 0.5 in both dimensions.

(big_ks <- ks_summary %>%
  filter(`KS(aghq, tmbstan)` > 0.5, `KS(TMB, tmbstan)` > 0.5) %>%
  pull(Parameter))
##  [1] "log_sigma_rho_x"      "log_sigma_rho_xs"     "logit_phi_rho_a"      "logit_phi_rho_as"     "log_sigma_alpha_x"    "log_sigma_alpha_xs"  
##  [7] "logit_phi_alpha_a"    "logit_phi_alpha_as"   "log_sigma_alpha_xa"   "log_sigma_lambda_x"   "log_sigma_ancrho_x"   "log_sigma_ancalpha_x"

Let’s look into this further by plotting the histograms:

lapply(big_ks, histogram_plot)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

5 MMD

#' To write!

6 PSIS

Suppose we have two sets of samples from the posterior. For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios. We can extract the TMB objective function for the log-likelihood as follows:

tmb$fit$obj$fn()
## [1] NaN